library(here)
library(tidyverse)
theme_set(theme_light())

library(scuttle)
library(iCOBRA)

library(kableExtra)
library(DDCompanion)

Setup

## Directory setup
here_root <- "benchmarks/lupus-n_patients"
here::i_am(file.path(here_root, "analysis/lupus-n_patients-sim-results.Rmd"))
#> here() starts at /kyukon/data/gent/vo/000/gvo00063/Jeroen/DD_project_v2

res_dir <- here::here(here_root, "results")
fig_dir <- here::here(here_root, "figures")
  • Using n_patients: 10 20 30
  • Using methods: edgeR_NB, edgeR_QP, edgeR_NB_optim, edgeR_QP_optim, bGLM, qbGLM, qbGLM_offset, qbGLM_offset_squeeze
  • Using celltype: B_mem
  • Using prop_DE: 0.05

Load results

# TODO: also add all-patients results for comparison?


res_files <- map(n_patients, ~ get_sim_res_files(
    dataset = "lupus-n_patients",
    methods = methods,
    prop_DE = prop_DE,
    celltype = celltype,
    datatype = "pb",
    n_patients = .x
)) %>%
    set_names(paste0("n_patients_", n_patients))

#res_list <- map_depth(res_files, 2, readRDS)
res_list <- lapply(res_files, function(element){
        lapply(element, readRDS)
})
str(res_list, max.level = 3)
#> List of 3
#>  $ n_patients_10:List of 8
#>   ..$ edgeR_NB            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_QP            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_NB_optim      :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_QP_optim      :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ bGLM                :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM               :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset        :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset_squeeze:List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>  $ n_patients_20:List of 8
#>   ..$ edgeR_NB            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_QP            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_NB_optim      :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_QP_optim      :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ bGLM                :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM               :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset        :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset_squeeze:List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>  $ n_patients_30:List of 8
#>   ..$ edgeR_NB            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_QP            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_NB_optim      :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_QP_optim      :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ bGLM                :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM               :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset        :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset_squeeze:List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2

Load SCE objects

data_files <- map(n_patients, ~ get_SCE_files(
    dataset = "lupus-n_patients", which = "sim_replicates",
    celltype = celltype, n_patients = .x, prop_DE = prop_DE
)) %>%
    set_names(paste0("n_patients_", n_patients))
sce_objects <- map(data_files, readRDS)
str(sce_objects, max.level = 3)
#> List of 3
#>  $ n_patients_10:List of 5
#>   ..$ replicate_1:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_2:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_3:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_4:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_5:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>  $ n_patients_20:List of 5
#>   ..$ replicate_1:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_2:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_3:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_4:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_5:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>  $ n_patients_30:List of 5
#>   ..$ replicate_1:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_2:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_3:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_4:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_5:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots

Data overview

  • The results were generated from 5 mock replicates
  • Each replicate was generated by randomly splitting the subjects in two mock groups
  • No sub-sampling of cells per patient was performed for this data
  • DE was introduced by randomly swapping genes in one of the mock groups
  • The number of patients was randomly sub-sampled stratified by mock group

Note that for n_patients 10 and 20, the selected patients vary randomly between the replicates, which is why the nrows and ncols are not identical. For the n_patients 30, however, no random sub-sampling was done because I could just use all patients from 2 out of the original 3 batches from the Control samples. That’s why the ncols are identical for those subsets.

The mock group assignment of each subject also varies across replicates.

map_dfr(sce_objects,
    ~ map_dfr(.x, function(x) c(nrows = nrow(x), ncols = ncol(x)),
        .id = "replicate"
    ),
    .id = "n_patients"
)

Subjects are divided across mock groups as follows:

n_patients = 10

replicate_1

A B
IGTB1762_IGTB1762 1 0
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 0 1
IGTB1828_IGTB1828 1 0
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 1 0
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 0 1

replicate_2

A B
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 0 1
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 0 1
IGTB1871_IGTB1871 1 0
IGTB1895_IGTB1895 0 1
IGTB1901_IGTB1901 1 0
IGTB1952_IGTB1952 1 0
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 1 0

replicate_3

A B
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 1 0
IGTB1793_IGTB1793 1 0
IGTB1815_IGTB1815 0 1
IGTB1895_IGTB1895 1 0
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 0 1

replicate_4

A B
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 1 0
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1895_IGTB1895 0 1
IGTB1901_IGTB1901 1 0
IGTB1952_IGTB1952 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2065_IGTB2065 1 0

replicate_5

A B
IGTB1762_IGTB1762 1 0
IGTB1789_IGTB1789 1 0
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 0 1
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 0 1
IGTB1901_IGTB1901 1 0
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

n_patients = 20

replicate_1

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 1 0
IGTB195_IGTB195 0 1
IGTB469_IGTB469 1 0
IGTB498_IGTB498 1 0
IGTB508_IGTB508 1 0
IGTB514_IGTB514 1 0
IGTB826_IGTB826 0 1
IGTB1506_IGTB1506 0 1
IGTB1762_IGTB1762 1 0
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 0 1
IGTB1840_IGTB1840 0 1
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 1 0
IGTB1952_IGTB1952 1 0
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2065_IGTB2065 0 1

replicate_2

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 0 1
IGTB195_IGTB195 1 0
IGTB469_IGTB469 1 0
IGTB508_IGTB508 0 1
IGTB514_IGTB514 0 1
IGTB670_IGTB670 0 1
IGTB826_IGTB826 1 0
IGTB1506_IGTB1506 0 1
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 0 1
IGTB1840_IGTB1840 0 1
IGTB1871_IGTB1871 1 0
IGTB1901_IGTB1901 1 0
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 1 0
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

replicate_3

A B
IGTB143_IGTB143 0 1
IGTB195_IGTB195 0 1
IGTB498_IGTB498 0 1
IGTB508_IGTB508 0 1
IGTB514_IGTB514 0 1
IGTB645_IGTB645 0 1
IGTB670_IGTB670 1 0
IGTB826_IGTB826 1 0
IGTB1372_IGTB1372 1 0
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 1 0
IGTB1789_IGTB1789 1 0
IGTB1815_IGTB1815 0 1
IGTB1828_IGTB1828 1 0
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 1 0
IGTB1952_IGTB1952 0 1
IGTB1982_IGTB1982 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 0 1

replicate_4

A B
IGTB143_IGTB143 1 0
IGTB469_IGTB469 0 1
IGTB498_IGTB498 0 1
IGTB670_IGTB670 1 0
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 1 0
IGTB1840_IGTB1840 0 1
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 0 1
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 1 0
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

replicate_5

A B
IGTB141_IGTB141 0 1
IGTB143_IGTB143 0 1
IGTB498_IGTB498 1 0
IGTB508_IGTB508 0 1
IGTB514_IGTB514 1 0
IGTB645_IGTB645 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 1 0
IGTB1789_IGTB1789 1 0
IGTB1815_IGTB1815 0 1
IGTB1840_IGTB1840 0 1
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 0 1
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 0 1
IGTB2065_IGTB2065 1 0

n_patients = 30

replicate_1

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 1 0
IGTB195_IGTB195 0 1
IGTB469_IGTB469 1 0
IGTB498_IGTB498 1 0
IGTB508_IGTB508 1 0
IGTB514_IGTB514 1 0
IGTB645_IGTB645 1 0
IGTB670_IGTB670 0 1
IGTB826_IGTB826 0 1
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 0 1
IGTB1762_IGTB1762 1 0
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 0 1
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 0 1
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 0 1
IGTB1952_IGTB1952 1 0
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 0 1

replicate_2

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 0 1
IGTB195_IGTB195 1 0
IGTB469_IGTB469 1 0
IGTB498_IGTB498 0 1
IGTB508_IGTB508 0 1
IGTB514_IGTB514 0 1
IGTB645_IGTB645 0 1
IGTB670_IGTB670 0 1
IGTB826_IGTB826 1 0
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 0 1
IGTB1871_IGTB1871 1 0
IGTB1895_IGTB1895 0 1
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 1 0
IGTB1952_IGTB1952 1 0
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 1 0
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

replicate_3

A B
IGTB141_IGTB141 0 1
IGTB143_IGTB143 0 1
IGTB195_IGTB195 0 1
IGTB469_IGTB469 1 0
IGTB498_IGTB498 0 1
IGTB508_IGTB508 0 1
IGTB514_IGTB514 0 1
IGTB645_IGTB645 0 1
IGTB670_IGTB670 1 0
IGTB826_IGTB826 1 0
IGTB1372_IGTB1372 1 0
IGTB1506_IGTB1506 1 0
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 1 0
IGTB1793_IGTB1793 1 0
IGTB1815_IGTB1815 0 1
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 1 0
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 0 1

replicate_4

A B
IGTB141_IGTB141 0 1
IGTB143_IGTB143 1 0
IGTB195_IGTB195 1 0
IGTB469_IGTB469 0 1
IGTB498_IGTB498 0 1
IGTB508_IGTB508 0 1
IGTB514_IGTB514 1 0
IGTB645_IGTB645 0 1
IGTB670_IGTB670 1 0
IGTB826_IGTB826 0 1
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 1 0
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 0 1
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 0 1
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 1 0
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

replicate_5

A B
IGTB141_IGTB141 0 1
IGTB143_IGTB143 0 1
IGTB195_IGTB195 0 1
IGTB469_IGTB469 1 0
IGTB498_IGTB498 1 0
IGTB508_IGTB508 0 1
IGTB514_IGTB514 1 0
IGTB645_IGTB645 0 1
IGTB670_IGTB670 1 0
IGTB826_IGTB826 1 0
IGTB1372_IGTB1372 1 0
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 1 0
IGTB1789_IGTB1789 1 0
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 0 1
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 0 1
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 0 1
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

The number of DE and non-DE genes per replicate:

map(sce_objects, ~ map_dfr(.x, ~ table(rowData(.x)$is_DE), .id = "replicate"))
#> $n_patients_10
#> # A tibble: 5 × 3
#>   replicate   `FALSE`     `TRUE`     
#>   <chr>       <table[1d]> <table[1d]>
#> 1 replicate_1 4546        239        
#> 2 replicate_2 4535        240        
#> 3 replicate_3 4539        241        
#> 4 replicate_4 4545        240        
#> 5 replicate_5 4540        240        
#> 
#> $n_patients_20
#> # A tibble: 5 × 3
#>   replicate   `FALSE`     `TRUE`     
#>   <chr>       <table[1d]> <table[1d]>
#> 1 replicate_1 4415        234        
#> 2 replicate_2 4393        235        
#> 3 replicate_3 4214        239        
#> 4 replicate_4 4510        239        
#> 5 replicate_5 4409        239        
#> 
#> $n_patients_30
#> # A tibble: 5 × 3
#>   replicate   `FALSE`     `TRUE`     
#>   <chr>       <table[1d]> <table[1d]>
#> 1 replicate_1 4412        236        
#> 2 replicate_2 4407        237        
#> 3 replicate_3 4406        240        
#> 4 replicate_4 4409        238        
#> 5 replicate_5 4411        239

t-SNE plots

tSNE_plots <- map(sce_objects, function(x) {
    p <- imap(x, function(sce, name) {
        scater::plotTSNE(sce, colour_by = "mock_group") +
            ggtitle(name)
    })
    patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})

n_patients = 10

n_patients = 20

n_patients = 30

Extract results of interest

Runtimes

## Get runtimes for each celltype
runtimes <- map_dfr(res_list,
    ~ map_dfr(.x, get_runtimes, depth = 1, .id = "method"),
    .id = "n_patients"
) %>%
    mutate(n_patients = as.numeric(sub("n_patients_", "", n_patients)))

P-values

res_tables <- map_depth(res_list, 2, get_aggregated_rep_tables, depth = 1)
res_tables <- map(res_tables, ~ combine_tables(.x, .id = "method"))

Visualize results

Run times

ggplot(runtimes, aes(n_patients, time, col = method)) +
    geom_jitter(width = 0.5, alpha = 0.6) +
    geom_smooth(se = FALSE, method = "lm", formula = y ~ x) +
    labs(x = "Number of patients", y = "Time (seconds)", color = NULL) +
    scale_x_continuous(breaks = unique(runtimes$n_patients), minor_breaks = NULL)

P-value distributions for non-DE genes

non_de_res <- map2(sce_objects, res_tables, function(sce_list, res) {
    by_rep <- split(res, res$replicate)
    out <- map2(sce_list, by_rep, function(sce, tbl) {
        ## Select only non-DE genes
        non_de <- rownames(sce)[!rowData(sce)$is_DE]
        tbl[tbl$gene %in% non_de, ]
    })
    bind_rows(out, .id = "replicate")
})
non_de_pval_figs <- map(non_de_res, ~ pval_hist(.x))

n_patients_10

n_patients_20

n_patients_30

Performance evaluation with iCOBRA

Prepare Data

P-values for missing genes are set to 1.

cobra_data <- map2(res_tables, sce_objects, function(res_table, sce_list) {
    ## Split up results per replicate
    res_per_replicate <- split(res_table, res_table$replicate)
    map2(res_per_replicate, sce_list, prepare_COBRAData, replace_missing = TRUE)
})

cobra_perf <- map_depth(cobra_data, 2, calculate_performance, binary_truth = "status")

cobra_objects <- map_depth(cobra_perf, 2, prepare_data_for_plot)

FDR-TPR curves

fdr_tpr_plots <- map(cobra_objects, function(cobra_list) {
    p <- imap(cobra_list, ~ plot_fdrtprcurve(.x, title = .y))
    patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})

n_patients_10

n_patients_20

n_patients_30

FDR-TPR plots averaged across replicate

## Working points
fdr_tpr_points <- map(cobra_objects, combine_fdrtpr_tables) %>%
    bind_rows(.id = "n_patients") %>%
    mutate(n_patients = as.numeric(sub("n_patients_", "", n_patients)))

fdr_tpr_points_averaged <- fdr_tpr_points %>%
    group_by(thr, method, n_patients) %>%
    summarize(across(c(FDR, TPR), mean), .groups = "keep")
plot_fdrtpr_points(fdr_tpr_points_averaged) +
    facet_wrap(vars(n_patients), labeller = "label_both") +
    xlim(c(0, 0.45)) + ylim(c(0, 1))
#> Warning: Removed 1 rows containing missing values (`geom_point()`).

FDR control at threshold 0.05

use_thr <- 0.05
fdr_data <- filter(fdr_tpr_points, thr == use_thr)

plot_fdr_control(fdr_data, use_thr = use_thr) +
    ylim(c(0, 1)) +
    facet_wrap(vars(n_patients), labeller = "label_both")

Session info

Session info
#> [1] "2023-06-03 00:55:02 CEST"
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.2.0 (2022-04-22)
#>  os       Red Hat Enterprise Linux 8.6 (Ootpa)
#>  system   x86_64, linux-gnu
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Europe/Paris
#>  date     2023-06-03
#>  pandoc   2.13 @ /apps/gent/RHEL8/zen2-ib/software/Pandoc/2.13/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  ! package              * version   date (UTC) lib source
#>  P argparse             * 2.2.2     2023-02-15 [?] CRAN (R 4.2.0)
#>  P beachmat               2.14.2    2023-04-07 [?] Bioconductor
#>  P beeswarm               0.4.0     2021-06-01 [?] CRAN (R 4.2.0)
#>  P Biobase              * 2.58.0    2022-11-01 [?] Bioconductor
#>  P BiocGenerics         * 0.44.0    2022-11-01 [?] Bioconductor
#>  P BiocManager            1.30.20   2023-02-24 [?] CRAN (R 4.2.0)
#>  P BiocNeighbors          1.16.0    2022-11-01 [?] Bioconductor
#>  P BiocParallel           1.32.6    2023-03-17 [?] Bioconductor
#>  P BiocSingular           1.14.0    2022-11-01 [?] Bioconductor
#>  P bitops                 1.0-7     2021-04-24 [?] CRAN (R 4.2.0)
#>  P bluster                1.8.0     2022-11-01 [?] Bioconductor
#>  P bslib                  0.4.2     2022-12-16 [?] CRAN (R 4.2.0)
#>  P cachem                 1.0.8     2023-05-01 [?] CRAN (R 4.2.0)
#>  P cli                    3.6.1     2023-03-23 [?] CRAN (R 4.2.0)
#>  P cluster                2.1.4     2022-08-22 [?] CRAN (R 4.2.0)
#>  P codetools              0.2-19    2023-02-01 [?] CRAN (R 4.2.0)
#>  P colorspace             2.1-0     2023-01-23 [?] CRAN (R 4.2.0)
#>  P cowplot                1.1.1     2020-12-30 [?] CRAN (R 4.2.0)
#>    DDCompanion          * 0.1.15    2023-05-22 [1] local (./package)
#>  P DelayedArray           0.24.0    2022-11-01 [?] Bioconductor
#>  P DelayedMatrixStats     1.20.0    2022-11-01 [?] Bioconductor
#>  P digest                 0.6.31    2022-12-11 [?] CRAN (R 4.2.0)
#>  P dplyr                * 1.1.2     2023-04-20 [?] CRAN (R 4.2.0)
#>  P dqrng                  0.3.0     2021-05-01 [?] CRAN (R 4.2.0)
#>  P DT                     0.28      2023-05-18 [?] CRAN (R 4.2.0)
#>  P edgeR                  3.40.2    2023-01-19 [?] Bioconductor
#>  P ellipsis               0.3.2     2021-04-29 [?] CRAN (R 4.2.0)
#>  P evaluate               0.21      2023-05-05 [?] CRAN (R 4.2.0)
#>  P fansi                  1.0.4     2023-01-22 [?] CRAN (R 4.2.0)
#>  P farver                 2.1.1     2022-07-06 [?] CRAN (R 4.2.0)
#>  P fastmap                1.1.1     2023-02-24 [?] CRAN (R 4.2.0)
#>  P findpython             1.0.8     2023-03-14 [?] CRAN (R 4.2.0)
#>  P forcats              * 1.0.0     2023-01-29 [?] CRAN (R 4.2.0)
#>  P generics               0.1.3     2022-07-05 [?] CRAN (R 4.2.0)
#>  P GenomeInfoDb         * 1.34.9    2023-02-02 [?] Bioconductor
#>  P GenomeInfoDbData       1.2.9     2023-05-20 [?] Bioconductor
#>  P GenomicRanges        * 1.50.2    2022-12-16 [?] Bioconductor
#>  P ggbeeswarm             0.7.2     2023-04-29 [?] CRAN (R 4.2.0)
#>  P ggplot2              * 3.4.2     2023-04-03 [?] CRAN (R 4.2.0)
#>  P ggrepel                0.9.3     2023-02-03 [?] CRAN (R 4.2.0)
#>  P glue                   1.6.2     2022-02-24 [?] CRAN (R 4.2.0)
#>  P gridExtra              2.3       2017-09-09 [?] CRAN (R 4.2.0)
#>  P gtable                 0.3.3     2023-03-21 [?] CRAN (R 4.2.0)
#>  P here                 * 1.0.1     2020-12-13 [?] CRAN (R 4.2.0)
#>  P highr                  0.10      2022-12-22 [?] CRAN (R 4.2.0)
#>  P hms                    1.1.3     2023-03-21 [?] CRAN (R 4.2.0)
#>  P htmltools              0.5.5     2023-03-23 [?] CRAN (R 4.2.0)
#>  P htmlwidgets            1.6.2     2023-03-17 [?] CRAN (R 4.2.0)
#>  P httpuv                 1.6.11    2023-05-11 [?] CRAN (R 4.2.0)
#>  P httr                   1.4.6     2023-05-08 [?] CRAN (R 4.2.0)
#>  P iCOBRA               * 1.26.0    2022-11-01 [?] Bioconductor
#>  P igraph                 1.4.3     2023-05-22 [?] CRAN (R 4.2.0)
#>  P IRanges              * 2.32.0    2022-11-01 [?] Bioconductor
#>  P irlba                  2.3.5.1   2022-10-03 [?] CRAN (R 4.2.0)
#>  P jquerylib              0.1.4     2021-04-26 [?] CRAN (R 4.2.0)
#>  P jsonlite               1.8.4     2022-12-06 [?] CRAN (R 4.2.0)
#>  P kableExtra           * 1.3.4     2021-02-20 [?] CRAN (R 4.2.0)
#>  P knitr                  1.42      2023-01-25 [?] CRAN (R 4.2.0)
#>  P labeling               0.4.2     2020-10-20 [?] CRAN (R 4.2.0)
#>  P later                  1.3.1     2023-05-02 [?] CRAN (R 4.2.0)
#>  P lattice                0.21-8    2023-04-05 [?] CRAN (R 4.2.0)
#>  P lifecycle              1.0.3     2022-10-07 [?] CRAN (R 4.2.0)
#>  P limma                  3.54.2    2023-02-28 [?] Bioconductor
#>  P locfit                 1.5-9.7   2023-01-02 [?] CRAN (R 4.2.0)
#>  P lubridate            * 1.9.2     2023-02-10 [?] CRAN (R 4.2.0)
#>  P magrittr               2.0.3     2022-03-30 [?] CRAN (R 4.2.0)
#>  P Matrix                 1.5-4.1   2023-05-18 [?] CRAN (R 4.2.0)
#>  P MatrixGenerics       * 1.10.0    2022-11-01 [?] Bioconductor
#>  P matrixStats          * 0.63.0    2022-11-18 [?] CRAN (R 4.2.0)
#>  P metapod                1.6.0     2022-11-01 [?] Bioconductor
#>  P mgcv                   1.8-42    2023-03-02 [?] CRAN (R 4.2.0)
#>  P mime                   0.12      2021-09-28 [?] CRAN (R 4.2.0)
#>  P munsell                0.5.0     2018-06-12 [?] CRAN (R 4.2.0)
#>  P nlme                   3.1-162   2023-01-31 [?] CRAN (R 4.2.0)
#>  P patchwork              1.1.2     2022-08-19 [?] CRAN (R 4.2.0)
#>  P pillar                 1.9.0     2023-03-22 [?] CRAN (R 4.2.0)
#>  P pkgconfig              2.0.3     2019-09-22 [?] CRAN (R 4.2.0)
#>  P plyr                   1.8.8     2022-11-11 [?] CRAN (R 4.2.0)
#>  P promises               1.2.0.1   2021-02-11 [?] CRAN (R 4.2.0)
#>  P purrr                * 1.0.1     2023-01-10 [?] CRAN (R 4.2.0)
#>  P R6                     2.5.1     2021-08-19 [?] CRAN (R 4.2.0)
#>  P Rcpp                   1.0.10    2023-01-22 [?] CRAN (R 4.2.0)
#>  P RCurl                  1.98-1.12 2023-03-27 [?] CRAN (R 4.2.0)
#>  P readr                * 2.1.4     2023-02-10 [?] CRAN (R 4.2.0)
#>    renv                   0.17.3    2023-04-06 [1] CRAN (R 4.2.0)
#>  P reshape2               1.4.4     2020-04-09 [?] CRAN (R 4.2.0)
#>  P rlang                  1.1.1     2023-04-28 [?] CRAN (R 4.2.0)
#>  P rmarkdown              2.21      2023-03-26 [?] CRAN (R 4.2.0)
#>  P ROCR                   1.0-11    2020-05-02 [?] CRAN (R 4.2.0)
#>  P rprojroot              2.0.3     2022-04-02 [?] CRAN (R 4.2.0)
#>  P rstudioapi             0.14      2022-08-22 [?] CRAN (R 4.2.0)
#>  P rsvd                   1.0.5     2021-04-16 [?] CRAN (R 4.2.0)
#>  P rvest                  1.0.3     2022-08-19 [?] CRAN (R 4.2.0)
#>  P S4Vectors            * 0.36.2    2023-02-26 [?] Bioconductor
#>  P sass                   0.4.6     2023-05-03 [?] CRAN (R 4.2.0)
#>  P ScaledMatrix           1.6.0     2022-11-01 [?] Bioconductor
#>  P scales                 1.2.1     2022-08-20 [?] CRAN (R 4.2.0)
#>  P scater                 1.26.1    2022-11-13 [?] Bioconductor
#>  P scran                  1.26.2    2023-01-19 [?] Bioconductor
#>  P scuttle              * 1.8.4     2023-01-19 [?] Bioconductor
#>  P sessioninfo            1.2.2     2021-12-06 [?] CRAN (R 4.2.0)
#>  P shiny                  1.7.4     2022-12-15 [?] CRAN (R 4.2.0)
#>  P shinyBS                0.61.1    2022-04-17 [?] CRAN (R 4.2.0)
#>  P shinydashboard         0.7.2     2021-09-30 [?] CRAN (R 4.2.0)
#>  P SingleCellExperiment * 1.20.1    2023-03-17 [?] Bioconductor
#>  P sparseMatrixStats      1.10.0    2022-11-01 [?] Bioconductor
#>  P statmod                1.5.0     2023-01-06 [?] CRAN (R 4.2.0)
#>  P stringi                1.7.12    2023-01-11 [?] CRAN (R 4.2.0)
#>  P stringr              * 1.5.0     2022-12-02 [?] CRAN (R 4.2.0)
#>  P SummarizedExperiment * 1.28.0    2022-11-01 [?] Bioconductor
#>  P svglite                2.1.1     2023-01-10 [?] CRAN (R 4.2.0)
#>  P systemfonts            1.0.4     2022-02-11 [?] CRAN (R 4.2.0)
#>  P tibble               * 3.2.1     2023-03-20 [?] CRAN (R 4.2.0)
#>  P tidyr                * 1.3.0     2023-01-24 [?] CRAN (R 4.2.0)
#>  P tidyselect             1.2.0     2022-10-10 [?] CRAN (R 4.2.0)
#>  P tidyverse            * 2.0.0     2023-02-22 [?] CRAN (R 4.2.0)
#>  P timechange             0.2.0     2023-01-11 [?] CRAN (R 4.2.0)
#>  P tzdb                   0.4.0     2023-05-12 [?] CRAN (R 4.2.0)
#>  P UpSetR                 1.4.0     2019-05-22 [?] CRAN (R 4.2.0)
#>  P utf8                   1.2.3     2023-01-31 [?] CRAN (R 4.2.0)
#>  P vctrs                  0.6.2     2023-04-19 [?] CRAN (R 4.2.0)
#>  P vipor                  0.4.5     2017-03-22 [?] CRAN (R 4.2.0)
#>  P viridis                0.6.3     2023-05-03 [?] CRAN (R 4.2.0)
#>  P viridisLite            0.4.2     2023-05-02 [?] CRAN (R 4.2.0)
#>  P webshot                0.5.4     2022-09-26 [?] CRAN (R 4.2.0)
#>  P withr                  2.5.0     2022-03-03 [?] CRAN (R 4.2.0)
#>  P xfun                   0.39      2023-04-20 [?] CRAN (R 4.2.0)
#>  P xml2                   1.3.4     2023-04-27 [?] CRAN (R 4.2.0)
#>  P xtable                 1.8-4     2019-04-21 [?] CRAN (R 4.2.0)
#>  P XVector                0.38.0    2022-11-01 [?] Bioconductor
#>  P yaml                   2.3.7     2023-01-23 [?] CRAN (R 4.2.0)
#>  P zlibbioc               1.44.0    2022-11-01 [?] Bioconductor
#> 
#>  [1] /kyukon/data/gent/vo/000/gvo00063/Jeroen/DD_project_v2/benchmarks/lupus-n_patients/renv/library/R-4.2/x86_64-pc-linux-gnu
#>  [2] /kyukon/home/gent/460/vsc46052/.cache/R/renv/sandbox/R-4.2/x86_64-pc-linux-gnu/4df86545
#> 
#>  P ── Loaded and on-disk path mismatch.
#> 
#> ──────────────────────────────────────────────────────────────────────────────